In [2]:
import pandas as pd
In [1]:
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Bar
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()
import datavis as dv
In [61]:
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d
from bokeh.plotting import *
In [4]:
gfti = pd.read_pickle("../CDPdata/gfti.pkl")
In [265]:
gfti = pd.read_pickle("../CDPdata/profiles2.pkl")
In [266]:
gfti.columns
Out[266]:
In [267]:
# need scopeg to get counts and totals
# screen out 2008
sc = gfti.set_index("Organisation")
sc = sc[sc["year"].isin(range(2009,2014))]
scopeg = sc.groupby("year")
In [268]:
# get the counts
d = scopeg.describe()
d = d.reset_index()
scope12_dis = d[d["level_1"]=="count"][["year", "scope1"]]
scope12_dis.rename(columns={"scope1": "n"}, inplace=True)
In [269]:
t = scopeg.sum().reset_index() # .rename(columns={"scope1": "totaltons"})
t.set_index("year", inplace=True)
# tons to Mt CO2e
t["scope1"] = t["scope1"]/1000000
t["scope2"] = t["scope2"]/1000000
scope12_dis.set_index("year", inplace=True)
scope12_dis = scope12_dis.join(t)
In [270]:
scope12_dis
Out[270]:
In [243]:
years = []
for i in scope12_dis.index:
years.append(str(i))
In [244]:
values12 = OrderedDict()
values12["Companies That Disclosed That Year"] = scope12_dis["n"].values
# values12["Scope 1"]= scope12_dis["scope1"].values
# values12["Scope 2"]= scope12_dis["scope2"].values
In [245]:
bar = Bar(values12, years, title="Companies in the Sample", filename="scope12_disclosure.html",
stacked=False, ylabel="n", xlabel="Year", palette=brewer["Spectral"][7])
#yend = 10000
# bar.y_range.end = 6000
# bar.y_range.end = 2000
#bar.extra_y_ranges = {"Mtons": Range1d(start=0, end=yend)}
# bar.add_layout(LinearAxis(y_range_name="Mtons", axis_label="Mt CO2e"), 'right')
# bar.show()
In [246]:
show(bar)
In [271]:
scope12 = sc
In [438]:
sectors_groups = scope12.reset_index().groupby("Sector")
sector_ns = sectors_groups.apply(lambda g: len(g["Organisation"].value_counts().index))
In [440]:
sector_ns.to_csv("../CDPdata/sectorns.csv")
In [441]:
c_groups = scope12.reset_index().groupby("Country")
c_ns = c_groups.apply(lambda g: len(g["Organisation"].value_counts().index))
In [442]:
c_ns.to_csv("../CDPdata/countryns.csv")
In [364]:
scope12["Sector"] = scope12["Sector"].apply(lambda x: "Telecoms" if x=="Telecommunication Services" else x)
In [366]:
scope12["scope1"] = scope12["scope1"]/1000000
scope12["scope2"] = scope12["scope2"]/1000000
In [367]:
scope12_cs = scope12.groupby(["year","Country","Sector"])
scope12_c = scope12.groupby(["year", "Country"])
scope12_s = scope12.groupby(["year", "Sector"])
In [368]:
# get sums of emissions disclosed
scope12_csums = scope12_c.sum().reset_index().set_index('year')
scope12_ssums = scope12_s.sum().reset_index().set_index('year')
In [369]:
# get count of number of companies that reported
d = scope12_s.describe()
d = d.reset_index()
scope12_sdis = d[d["level_2"]=="count"][["year", "Sector","scope1", "scope2"]]
scope12_sdis.set_index("year",inplace=True)
In [370]:
# get count of number of companies that reported by country
d = scope12_c.describe()
d = d.reset_index()
scope12_cdis = d[d["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scope12_cdis.set_index("year",inplace=True)
In [371]:
scope12_sdis.head()
Out[371]:
In [372]:
g = scope12.reset_index().groupby("Country").sum().sort("scope1", ascending=0)
h = scope12.reset_index().groupby("Sector").sum().sort("scope1", ascending=0)
countries = g.index.tolist()
sectors = h.index.tolist()
In [373]:
sectors = scope12_sdis.loc[2010].sort("scope2", ascending= 0)["Sector"].tolist()
# sectors = scope12_sdis["Sector"].value_counts().index
In [390]:
countries = scope12_cdis.loc[2010].sort("scope2", ascending= 0)["Country"].tolist()
In [392]:
# run for sums data
scope1_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope1").fillna(0)
scope1_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope1").fillna(0)
scope2_csums_cats = dv.separate_cats(scope12_csums, "Country", countries, "scope2").fillna(0)
scope2_ssums_cats = dv.separate_cats(scope12_ssums, "Sector", sectors, "scope2").fillna(0)
In [401]:
scope12_cdis_cats
Out[401]:
In [393]:
# can run for dis(closure) or sums data
scope12_cdis_cats = dv.separate_cats(scope12_cdis, "Country", countries, "scope1").fillna(0)
scope12_sdis_cats = dv.separate_cats(scope12_sdis, "Sector", sectors, "scope1").fillna(0)
In [111]:
# for stacked area chart
# max number of brewer colors is 11
s1_csums_vals = dv.stacked_cols(scope1_csums_cats, countries)
# s1_ssums_vals = dv.stacked_cols(scope1_ssums_cats, sectors)
# colors = brewer["Spectral"][len(sectors)]
colors = brewer["Spectral"][11]
x2 = np.hstack((range(2009,2014)[::-1], range(2009,2014)))
title = "Scope 1 CO2e Disclosed by Country"
# title = "Scope 1 CO2e Disclosed by Sector"
# title = "Companies Disclosing Scope 1 by Country"
s = figure(title = title)
s.patches([x2 for a in s1_csums_vals], list(s1_csums_vals.values()),
color=colors, alpha=0.8, line_color=None)
Out[111]:
In [385]:
s1_csums_vs = dv.prep_stacked_bar(scope1_csums_cats, countries)
s2_csums_vs = dv.prep_stacked_bar(scope2_csums_cats, countries)
s1_ssums_vs = dv.prep_stacked_bar(scope1_ssums_cats, sectors)
s2_ssums_vs = dv.prep_stacked_bar(scope2_ssums_cats, sectors)
In [402]:
s12_cdis_vs = dv.prep_stacked_bar(scope12_cdis_cats, countries)
s12_sdis_vs = dv.prep_stacked_bar(scope12_sdis_cats, sectors)
In [384]:
reload(dv)
Out[384]:
In [394]:
s1cbar = Bar(s1_csums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Country", legend="bottom_right",
stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])
s2cbar = Bar(s2_csums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Country", legend="bottom_right",
stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])
s1sbar = Bar(s1_ssums_vs, years + ["", ""], title="Scope 1 CO2e Disclosed by Sector", legend="bottom_right",
stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])
s2sbar = Bar(s2_ssums_vs, years + ["", ""], title="Scope 2 CO2e Disclosed by Sector", legend="bottom_right",
stacked=True, ylabel="Mt CO2e", xlabel="Year", palette=brewer["Spectral"][11])
In [403]:
s12cdbar = Bar(s12_cdis_vs, years + ["", ""], title="Companies That Disclosed by Country", legend="bottom_right",
stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])
s12sdbar = Bar(s12_sdis_vs, years + ["", ""], title="Companies That Disclosed by Sector", legend="bottom_right",
stacked=True, ylabel="n", xlabel="Year", palette=brewer["Spectral"][11])
In [400]:
print countries
In [404]:
show(s12cdbar)
In [283]:
gftip=gfti[~gfti["pcintensity"].isnull()]
In [287]:
gftip["pc100"] = gftip["pcintensity"]*100
In [300]:
summary = gftip[['year','pc100']].groupby("year").describe()
In [312]:
mean = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["mean"]
median = summary.reset_index().set_index(["level_1","year"]).sort_index().loc["50%"]
In [314]:
mean.rename(columns={"pc100":"mean"}, inplace=True)
median.rename(columns={"pc100":"median"}, inplace=True)
In [320]:
pc_avgs= mean.join(median).reset_index()
In [321]:
pc_avgs.to_csv("../CDPdata/pc_avgs.csv")
In [25]:
gftip = dv.prep_forhist(gftip, "pcintensity", -1, 1)
In [322]:
gftip = dv.prep_forhist(gftip, "pc100", -100, 100)
In [323]:
gftip.set_index("year",inplace=True)
In [33]:
plateaus = gftip[gftip["plateau"]]
steady = gftip[gftip["steady"]]
In [36]:
plateau_values = OrderedDict()
for yr in range(2010,2014):
plateau_values[str(yr)] = plateaus.loc[yr]["pcintensity"].tolist()
In [37]:
steady_values = OrderedDict()
for yr in range(2010,2014):
steady_values[str(yr)] = steady.loc[yr]["pcintensity"].tolist()
In [324]:
intensity_values = OrderedDict()
for yr in range(2010,2014):
intensity_values[str(yr)] = gftip.loc[yr]["pc100"].tolist()
In [408]:
len(intensity_values["2013"])
Out[408]:
In [350]:
colors = brewer["BuPu"][5][0:4]
colors.reverse()
In [425]:
# fname = "2012 had inttarget vs int change.html"
title = "Annual Percent Intensity Change"
fname = "12intchangeyear.html"
hist = Histogram(intensity_values, bins=40, filename=fname, title = title, ylabel = "Density", xlabel = "% Change",legend=True)
# hist = Histogram(hiall_values, bins=30, filename=fname, title = title, ylabel = "", xlabel = "",legend=True)
In [426]:
show(hist)
In [ ]: